# Dataset found at https://www.kaggle.com/datasets/maartenvandevelde/marine-litter-watch-19502021
# Importing libraries
import pandas as pd
import numpy as np
from datetime import date
import plotly.express as px
import plotly.graph_objects as go
# Removes the 'SettingWithCopyWarning' that will show when running code later on in the project
pd.options.mode.chained_assignment = None
# Expand the width of the Jupyter notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
# Importing datasets
marine_data = pd.read_csv(r"C:\Users\jksmi\Documents\Marine_Litter\MLW_Data.csv",encoding='iso-8859-1')
marine_meta = pd.read_csv(r"C:\Users\jksmi\Documents\Marine_Litter\MLW_Meta.csv")
# View marine_data dataframe
marine_data
| CommunityName | BeachName | BeachCountrycode | BeachRegionalSea | BeachLength_m | BeachLocation | BeachType | EventDate | EventType | NatRef | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20160310 | Cleanup | NaN | ... | 2.0 | 4.0 | NaN | 3.0 | NaN | NaN | 3.0 | NaN | 1.0 | NaN |
| 1 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20150902 | Cleanup | NaN | ... | 1.0 | NaN | 2.0 | NaN | NaN | NaN | 7.0 | 3.0 | NaN | NaN |
| 2 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20151203 | Cleanup | NaN | ... | NaN | NaN | 5.0 | NaN | NaN | NaN | 2.0 | NaN | 2.0 | 1.0 |
| 3 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20151008 | Cleanup | NaN | ... | 5.0 | NaN | 2.0 | NaN | NaN | NaN | 5.0 | 26.0 | NaN | NaN |
| 4 | Legambiente Onlus | San Mauro Cilento (SA) | IT | Unknown | 100 | NaN | NaN | 20180428 | Monitoring | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | 2.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | Legambiente Onlus | Spiaggia di Marina di Montemarciano, Ancona (AN) | IT | Mediterranean Sea | 100 | NaN | NaN | 20160505 | Monitoring | NaN | ... | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
| 3409 | Surfrider Foundation Europe | La Lagune, 33260 La Teste de Buch, FRANCE | FR | North-east Atlantic Ocean | 548 | NaN | NaN | 20180602 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3410 | Surfers Against Sewage | Bournemouth beach | UK | North-east Atlantic Ocean | 2302 | Urban | Sandy | 20150328 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | NaN | NaN | NaN |
| 3411 | ocean-R-evolution.com | cleanuptravels.com | CH | NaN | 22 | Rural | Sandy | 20170315 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3412 | Marnoba | Las Salinas de Cabo de Gata, Almería | ES | Mediterranean Sea | 100 | Near river mouth | Sandy | 20130923 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3413 rows × 176 columns
# View marine_meta dataframe
marine_meta
| generalcode | category | generalname | |
|---|---|---|---|
| 0 | G1 | Plastic | 4/6-pack yokes, six-pack rings |
| 1 | G3 | Plastic | Shopping Bags incl. pieces |
| 2 | G4 | Plastic | Small plastic bags, e.g. freezer bags incl. pi... |
| 3 | G5 | Plastic | Plastic bags collective role; what remains fro... |
| 4 | G7 | Plastic | Drink bottles <=0.5l |
| ... | ... | ... | ... |
| 159 | G210 | Glass/ceramics | Other glass items |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... |
| 161 | G213 | Chemicals | Paraffin/Wax |
| 162 | G301 | Cloth/textile | Disposable mask |
| 163 | G302 | Cloth/textile | Disposable glove |
164 rows × 3 columns
# Checking for duplicates in the marine_meta dataset
marine_meta[marine_meta.duplicated()]
| generalcode | category | generalname |
|---|
# No duplicates found in the marine_meta dataset
# Find duplicates in marine data
marine_data[marine_data.duplicated()]
| CommunityName | BeachName | BeachCountrycode | BeachRegionalSea | BeachLength_m | BeachLocation | BeachType | EventDate | EventType | NatRef | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 390 | Swiss Litter Report | Murg_Waengi_StrickerR | CH | NaN | 13 | Urban | Pebbels | 20171019 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 439 | Surfrider Foundation Europe | Avenue Salvador Allende, 13500 Martigues, France | FR | Mediterranean Sea | 81 | NaN | NaN | 20150531 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 | NaN | NaN | NaN |
| 839 | Surfrider Foundation Europe | Viale Nettuno Lido di Metaponto, 75012 Lido di... | IT | Mediterranean Sea | 86 | NaN | NaN | 20150426 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 | NaN | NaN | NaN |
| 1400 | Surfrider Foundation Europe | 6178 Avenue de la Plage, 62231 Sangatte, France | FR | North-east Atlantic Ocean | 69 | NaN | NaN | 20160319 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 35.0 | NaN | NaN | NaN |
| 1998 | Surfrider Foundation Europe | via luigi sturzo, marina di Gioiosa Jonica RC,... | IT | Mediterranean Sea | 89 | NaN | NaN | 20150322 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 194.0 | NaN | NaN | NaN |
| 2203 | Surfrider Foundation Europe | 220-462 Route Departementale 940, 62200 Boulog... | FR | North-east Atlantic Ocean | 70 | NaN | NaN | 20150326 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | NaN | NaN | NaN |
| 2298 | Surfrider Foundation Europe | AS-317 Muros de Nal?n, 33138 Muros de Nal?n, E... | ES | North-east Atlantic Ocean | 81 | NaN | NaN | 20150321 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2299 | Surfrider Foundation Europe | 15 Avenue de la Grande Plage, 40510 Seignosse,... | FR | North-east Atlantic Ocean | 80 | NaN | NaN | 20150308 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN |
| 2537 | Marnoba | Cala de Enmedio, Almería | ES | Mediterranean Sea | 100 | Rural | Rocky | 20160822 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2566 | Marnoba | El Saler, Valencia/València | ES | Mediterranean Sea | 100 | Near river mouth | Sandy | 20171114 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3007 | Surfrider Foundation Europe | Beg an Dorchen Plomeur, 29120 Plomeur, France | FR | North-east Atlantic Ocean | 74 | NaN | NaN | 20150405 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 150.0 | NaN | NaN | NaN |
| 3241 | Surfrider Foundation Europe | 32 Via Cartagine, 00058 Santa Marinella, Italie | IT | Mediterranean Sea | 83 | NaN | NaN | 20150322 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 30.0 | NaN | NaN | NaN |
12 rows × 176 columns
# Checking the shape of the marine_data dataframe before removing duplicates
marine_data.shape
(3413, 176)
# Dropping duplicate values in dataset
marine_data = marine_data.drop_duplicates(keep='first')
# Checking the shape of the marine_data dataframe after duplicates removed
marine_data.shape
(3401, 176)
# Checking datatypes of the non-numerical columns in the marine_data dataframe
marine_data.loc[:,'CommunityName':'NatRef'].dtypes
CommunityName object BeachName object BeachCountrycode object BeachRegionalSea object BeachLength_m int64 BeachLocation object BeachType object EventDate int64 EventType object NatRef object dtype: object
# Changing EventDate to datetime datatype (EventDate is currently int64)
marine_data['EventDate'] = pd.to_datetime(marine_data['EventDate'],format="%Y%m%d")
# Finding the number of NaN values in each column of the marine_data dataframe
print(marine_data.isna().sum().to_string())
CommunityName 0 BeachName 0 BeachCountrycode 249 BeachRegionalSea 1112 BeachLength_m 0 BeachLocation 1738 BeachType 1716 EventDate 0 EventType 0 NatRef 3287 lon_x1 0 lat_y1 0 lon_x2 0 lat_y2 0 G1 3015 G3 967 G4 1560 G5 2998 G7 903 G8 1357 G9 2861 G10 1087 G11 3006 G12 2953 G13 2263 G14 3164 G15 3261 G16 3134 G17 3081 G18 3126 G19 3230 G21 698 G22 2224 G23 2028 G24 2116 G25 2093 G26 1719 G27 740 G28 2377 G29 3110 G30 808 G31 1721 G32 1901 G33 1441 G34 1613 G35 1169 G36 3130 G37 2472 G40 3207 G41 3204 G42 3246 G43 3149 G44 3150 G45 2901 G46 3188 G47 3103 G49 2233 G50 1149 G52 2403 G53 2719 G54 3079 G56 2612 G57 3126 G58 3221 G59 2022 G60 3140 G62 3103 G63 3234 G64 3306 G65 2524 G66 2091 G67 1982 G68 3156 G69 3228 G70 2034 G71 2048 G72 3144 G73 2428 G76 2850 G77 3305 G79 1049 G80 2203 G82 1135 G83 2440 G84 3221 G85 3133 G86 3128 G87 2305 G88 3125 G89 2266 G90 2524 G91 2481 G92 3190 G93 2438 G95 1453 G96 2196 G97 3214 G98 3203 G99 2472 G100 1976 G101 2609 G102 3134 G124 1573 G125 1957 G126 2495 G127 3328 G128 2601 G129 3313 G130 3175 G131 2552 G132 3374 G133 2587 G134 2207 G137 2001 G138 2516 G139 3249 G140 3324 G141 3306 G142 2332 G143 3334 G144 2313 G145 1506 G147 2586 G148 3021 G150 3107 G151 2457 G152 2131 G153 2445 G154 2546 G155 2602 G156 2142 G158 1988 G159 2333 G160 2561 G161 2975 G162 3273 G163 3262 G164 3244 G165 2485 G166 3227 G167 2616 G171 2133 G172 2381 G174 3064 G175 1241 G176 2362 G177 2012 G178 1677 G179 2648 G180 3290 G181 3268 G182 2656 G184 3307 G186 3261 G187 3338 G188 2711 G189 3345 G190 3303 G191 2457 G193 3319 G194 2659 G195 2722 G198 1746 G199 2477 G200 1456 G201 3190 G202 3238 G203 2597 G204 2157 G205 3348 G206 3277 G207 3282 G208 1333 G210 2418 G211 2451 G213 2338
# Finding the totals of each unique value in the BeachRegionalSea column
marine_data['BeachRegionalSea'].value_counts(dropna=False)
NaN 1112 Mediterranean Sea 1000 North-east Atlantic Ocean 815 Black Sea 268 Unknown 110 Baltic Sea 96 Name: BeachRegionalSea, dtype: int64
# BeachCountrycode to BeachRegionalSea groupings and counts
print(marine_data[['BeachCountrycode','BeachRegionalSea']].value_counts(dropna=False,sort=False).to_string())
BeachCountrycode BeachRegionalSea
BA Mediterranean Sea 1
NaN 1
BE North-east Atlantic Ocean 4
BG Black Sea 74
CH Unknown 1
NaN 983
CV Unknown 4
CY Mediterranean Sea 8
DE Baltic Sea 5
NaN 84
DK Baltic Sea 69
North-east Atlantic Ocean 187
DZ Mediterranean Sea 2
EE Baltic Sea 2
EL Mediterranean Sea 1
ES Mediterranean Sea 194
North-east Atlantic Ocean 164
Unknown 5
FI Baltic Sea 1
FR Mediterranean Sea 64
North-east Atlantic Ocean 242
NaN 5
GE Black Sea 20
GR Mediterranean Sea 67
HR Mediterranean Sea 7
IE North-east Atlantic Ocean 42
IL Mediterranean Sea 3
IT Mediterranean Sea 497
Unknown 11
Int Mediterranean Sea 1
North-east Atlantic Ocean 2
NaN 20
LB Mediterranean Sea 1
MA Mediterranean Sea 1
NaN 2
ME Mediterranean Sea 1
MT Mediterranean Sea 2
NL North-east Atlantic Ocean 10
NO North-east Atlantic Ocean 2
PT North-east Atlantic Ocean 25
RO Black Sea 145
RU Baltic Sea 9
Black Sea 11
Unknown 1
SE Baltic Sea 2
North-east Atlantic Ocean 3
SI Mediterranean Sea 77
TN Mediterranean Sea 4
TR Black Sea 9
Mediterranean Sea 11
UA Black Sea 6
UK North-east Atlantic Ocean 55
ZA NaN 4
NaN Baltic Sea 8
Black Sea 3
Mediterranean Sea 58
North-east Atlantic Ocean 79
Unknown 88
NaN 13
# Handling NaN values in the BeachRegionalSea column
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'BA' to 'Mediterranean Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='BA','Mediterranean Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'CH' to 'Unknown'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='CH','Mediterranean Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'DE' to 'Baltic Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='DE','Baltic Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'MA' to 'Mediterranean Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='MA','Mediterranean Sea',
inplace=True)
# Fill remaining NaN values in BeachRegionalSea to 'Unknown'
marine_data['BeachRegionalSea'].replace(to_replace=np.NaN,value='Unknown', inplace=True)
marine_data['BeachRegionalSea'].value_counts(dropna=False)
Mediterranean Sea 1987 North-east Atlantic Ocean 815 Black Sea 268 Baltic Sea 180 Unknown 151 Name: BeachRegionalSea, dtype: int64
# BeachCountrycode to BeachRegionalSea groupings and counts (with BeachRegionalSea NaN values handled)
marine_data[['BeachCountrycode','BeachRegionalSea']].value_counts(dropna=False,sort=False)
BeachCountrycode BeachRegionalSea
BA Mediterranean Sea 2
BE North-east Atlantic Ocean 4
BG Black Sea 74
CH Mediterranean Sea 984
CV Unknown 4
CY Mediterranean Sea 8
DE Baltic Sea 89
DK Baltic Sea 69
North-east Atlantic Ocean 187
DZ Mediterranean Sea 2
EE Baltic Sea 2
EL Mediterranean Sea 1
ES Mediterranean Sea 194
North-east Atlantic Ocean 164
Unknown 5
FI Baltic Sea 1
FR Mediterranean Sea 64
North-east Atlantic Ocean 242
Unknown 5
GE Black Sea 20
GR Mediterranean Sea 67
HR Mediterranean Sea 7
IE North-east Atlantic Ocean 42
IL Mediterranean Sea 3
IT Mediterranean Sea 497
Unknown 11
Int Mediterranean Sea 1
North-east Atlantic Ocean 2
Unknown 20
LB Mediterranean Sea 1
MA Mediterranean Sea 3
ME Mediterranean Sea 1
MT Mediterranean Sea 2
NL North-east Atlantic Ocean 10
NO North-east Atlantic Ocean 2
PT North-east Atlantic Ocean 25
RO Black Sea 145
RU Baltic Sea 9
Black Sea 11
Unknown 1
SE Baltic Sea 2
North-east Atlantic Ocean 3
SI Mediterranean Sea 77
TN Mediterranean Sea 4
TR Black Sea 9
Mediterranean Sea 11
UA Black Sea 6
UK North-east Atlantic Ocean 55
ZA Unknown 4
NaN Baltic Sea 8
Black Sea 3
Mediterranean Sea 58
North-east Atlantic Ocean 79
Unknown 101
dtype: int64
marine_data.loc[:,'G1':'G213']
| G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | G11 | G12 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | 1.0 | NaN | 8.0 | 31.0 | 2.0 | 4.0 | NaN | 1.0 | ... | 2.0 | 4.0 | NaN | 3.0 | NaN | NaN | 3.0 | NaN | 1.0 | NaN |
| 1 | NaN | 12.0 | 1.0 | NaN | 10.0 | 9.0 | NaN | 2.0 | 3.0 | NaN | ... | 1.0 | NaN | 2.0 | NaN | NaN | NaN | 7.0 | 3.0 | NaN | NaN |
| 2 | NaN | 27.0 | 7.0 | NaN | 3.0 | 8.0 | 1.0 | 9.0 | NaN | 1.0 | ... | NaN | NaN | 5.0 | NaN | NaN | NaN | 2.0 | NaN | 2.0 | 1.0 |
| 3 | NaN | 15.0 | 60.0 | NaN | 18.0 | 46.0 | NaN | 9.0 | 10.0 | 5.0 | ... | 5.0 | NaN | 2.0 | NaN | NaN | NaN | 5.0 | 26.0 | NaN | NaN |
| 4 | NaN | 5.0 | 6.0 | NaN | 13.0 | 19.0 | 13.0 | 6.0 | 4.0 | 2.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | 2.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | NaN | 1.0 | 23.0 | NaN | 16.0 | NaN | NaN | 1.0 | NaN | 1.0 | ... | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
| 3409 | NaN | 0.0 | 1.0 | NaN | 0.0 | 2.0 | NaN | 0.0 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3410 | NaN | 5.0 | NaN | NaN | 11.0 | 1.0 | NaN | 28.0 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | NaN | NaN | NaN |
| 3411 | NaN | NaN | 3.0 | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3412 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3401 rows × 162 columns
marine_data.loc[:,'G1':'G213'] = marine_data.loc[:,'G1':'G213'].fillna(0)
marine_data.loc[:,'G1':'G213']
| G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | G11 | G12 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 1.0 | 0.0 | 8.0 | 31.0 | 2.0 | 4.0 | 0.0 | 1.0 | ... | 2.0 | 4.0 | 0.0 | 3.0 | 0.0 | 0.0 | 3.0 | 0.0 | 1.0 | 0.0 |
| 1 | 0.0 | 12.0 | 1.0 | 0.0 | 10.0 | 9.0 | 0.0 | 2.0 | 3.0 | 0.0 | ... | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 7.0 | 3.0 | 0.0 | 0.0 |
| 2 | 0.0 | 27.0 | 7.0 | 0.0 | 3.0 | 8.0 | 1.0 | 9.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 2.0 | 0.0 | 2.0 | 1.0 |
| 3 | 0.0 | 15.0 | 60.0 | 0.0 | 18.0 | 46.0 | 0.0 | 9.0 | 10.0 | 5.0 | ... | 5.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 5.0 | 26.0 | 0.0 | 0.0 |
| 4 | 0.0 | 5.0 | 6.0 | 0.0 | 13.0 | 19.0 | 13.0 | 6.0 | 4.0 | 2.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 0.0 | 2.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | 0.0 | 1.0 | 23.0 | 0.0 | 16.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 |
| 3409 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 3410 | 0.0 | 5.0 | 0.0 | 0.0 | 11.0 | 1.0 | 0.0 | 28.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 | 0.0 | 0.0 |
| 3411 | 0.0 | 0.0 | 3.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 3412 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3401 rows × 162 columns
# Checking the number of NaN values in each column of the marine_data dataframe after NaN fill
print(marine_data.isna().sum().to_string())
CommunityName 0 BeachName 0 BeachCountrycode 249 BeachRegionalSea 0 BeachLength_m 0 BeachLocation 1738 BeachType 1716 EventDate 0 EventType 0 NatRef 3287 lon_x1 0 lat_y1 0 lon_x2 0 lat_y2 0 G1 0 G3 0 G4 0 G5 0 G7 0 G8 0 G9 0 G10 0 G11 0 G12 0 G13 0 G14 0 G15 0 G16 0 G17 0 G18 0 G19 0 G21 0 G22 0 G23 0 G24 0 G25 0 G26 0 G27 0 G28 0 G29 0 G30 0 G31 0 G32 0 G33 0 G34 0 G35 0 G36 0 G37 0 G40 0 G41 0 G42 0 G43 0 G44 0 G45 0 G46 0 G47 0 G49 0 G50 0 G52 0 G53 0 G54 0 G56 0 G57 0 G58 0 G59 0 G60 0 G62 0 G63 0 G64 0 G65 0 G66 0 G67 0 G68 0 G69 0 G70 0 G71 0 G72 0 G73 0 G76 0 G77 0 G79 0 G80 0 G82 0 G83 0 G84 0 G85 0 G86 0 G87 0 G88 0 G89 0 G90 0 G91 0 G92 0 G93 0 G95 0 G96 0 G97 0 G98 0 G99 0 G100 0 G101 0 G102 0 G124 0 G125 0 G126 0 G127 0 G128 0 G129 0 G130 0 G131 0 G132 0 G133 0 G134 0 G137 0 G138 0 G139 0 G140 0 G141 0 G142 0 G143 0 G144 0 G145 0 G147 0 G148 0 G150 0 G151 0 G152 0 G153 0 G154 0 G155 0 G156 0 G158 0 G159 0 G160 0 G161 0 G162 0 G163 0 G164 0 G165 0 G166 0 G167 0 G171 0 G172 0 G174 0 G175 0 G176 0 G177 0 G178 0 G179 0 G180 0 G181 0 G182 0 G184 0 G186 0 G187 0 G188 0 G189 0 G190 0 G191 0 G193 0 G194 0 G195 0 G198 0 G199 0 G200 0 G201 0 G202 0 G203 0 G204 0 G205 0 G206 0 G207 0 G208 0 G210 0 G211 0 G213 0
marine_data['CommunityName'].nunique()
54
marine_data['EventType'].value_counts()
Cleanup 2611 Monitoring 790 Name: EventType, dtype: int64
marine_data.groupby(['EventType'])['BeachRegionalSea'].value_counts()
EventType BeachRegionalSea
Cleanup Mediterranean Sea 1509
North-east Atlantic Ocean 660
Black Sea 170
Unknown 138
Baltic Sea 134
Monitoring Mediterranean Sea 478
North-east Atlantic Ocean 155
Black Sea 98
Baltic Sea 46
Unknown 13
Name: BeachRegionalSea, dtype: int64
# Creating dataframe with only generalcode tally totals in marine_data dataframe
code_totals = marine_data.loc[:,'G1':'G213'].sum(axis=0,skipna=True).reset_index()
code_totals
| index | 0 | |
|---|---|---|
| 0 | G1 | 3793.0 |
| 1 | G3 | 84974.0 |
| 2 | G4 | 22793.0 |
| 3 | G5 | 4628.0 |
| 4 | G7 | 34186.0 |
| ... | ... | ... |
| 157 | G207 | 350.0 |
| 158 | G208 | 63418.0 |
| 159 | G210 | 8471.0 |
| 160 | G211 | 1306.0 |
| 161 | G213 | 14184.0 |
162 rows × 2 columns
# Changing the column names of the code_totals dataframe
code_totals.rename(columns={"index":"generalcode",0:"total"},
inplace=True)
code_totals
| generalcode | total | |
|---|---|---|
| 0 | G1 | 3793.0 |
| 1 | G3 | 84974.0 |
| 2 | G4 | 22793.0 |
| 3 | G5 | 4628.0 |
| 4 | G7 | 34186.0 |
| ... | ... | ... |
| 157 | G207 | 350.0 |
| 158 | G208 | 63418.0 |
| 159 | G210 | 8471.0 |
| 160 | G211 | 1306.0 |
| 161 | G213 | 14184.0 |
162 rows × 2 columns
# Creating a dataframe merging the marine_meta and code_totals dataframes together
sub_category_totals = pd.merge(marine_meta,code_totals)
sub_category_totals
| generalcode | category | generalname | total | |
|---|---|---|---|---|
| 0 | G1 | Plastic | 4/6-pack yokes, six-pack rings | 3793.0 |
| 1 | G3 | Plastic | Shopping Bags incl. pieces | 84974.0 |
| 2 | G4 | Plastic | Small plastic bags, e.g. freezer bags incl. pi... | 22793.0 |
| 3 | G5 | Plastic | Plastic bags collective role; what remains fro... | 4628.0 |
| 4 | G7 | Plastic | Drink bottles <=0.5l | 34186.0 |
| ... | ... | ... | ... | ... |
| 157 | G207 | Glass/ceramics | Octopus pots | 350.0 |
| 158 | G208 | Glass/ceramics | Glass or ceramic fragments > 2.5 cm | 63418.0 |
| 159 | G210 | Glass/ceramics | Other glass items | 8471.0 |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... | 1306.0 |
| 161 | G213 | Chemicals | Paraffin/Wax | 14184.0 |
162 rows × 4 columns
# Creating a plotly table using the Category Totals dataframe
row_color1='white'
row_color2='lightsteelblue'
sub_category_fig = go.Figure(data=[go.Table(
header=dict(values=[['General Code'],['Category'],['General Name'],['Totals']],
fill_color='lightgrey',
line_color='darkslategray',
align='left'),
cells=dict(values=[sub_category_totals.generalcode, sub_category_totals.category,
sub_category_totals.generalname, sub_category_totals.total],
fill_color=[[row_color1,row_color2]*81],# 81 is half the number of rows in sub_category_totals dataframe
line_color='darkslategray',
align='left'))
])
sub_category_fig.update_layout(
showlegend=False,
title_text="<b>Marine Litter Sub-Category Totals (Itemized)</b>"
)
sub_category_fig.show()
# Create dataframe with category totals
category_totals = sub_category_totals.groupby('category').agg({'total':'sum'}).reset_index()
# Create percentage column
category_totals['% of Litter Collected'] = (category_totals['total']) / (category_totals['total'].sum()) * 100
# Round % to two decimal places
category_totals['% of Litter Collected'] = category_totals['% of Litter Collected'].round(decimals=2)
category_totals
| category | total | % of Litter Collected | |
|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.68 |
| 1 | Cloth/textile | 30066.0 | 1.45 |
| 2 | Glass/ceramics | 112504.0 | 5.43 |
| 3 | Metal | 82404.0 | 3.97 |
| 4 | Paper/Cardboard | 42871.0 | 2.07 |
| 5 | Plastic | 1758360.0 | 84.82 |
| 6 | Processed/worked wood | 18899.0 | 0.91 |
| 7 | Rubber | 12482.0 | 0.60 |
| 8 | unidentified | 1306.0 | 0.06 |
category_fig = go.Figure(data=[go.Table(
columnorder = [1,2],
columnwidth = [80,70],
header = dict(
values = [['<b>Plastic</b> made up <b>84.82%</b> <br>of all<b> Litter Collected</b>'],
['<b>54</b> communities participated']],
line_color='white',
fill_color='white',
align='left',
font=dict(color='#0000FF',size=35),
height=40),
cells=dict(values=[['<b>2611</b> Cleanup events'],
['<b>790</b> Monitoring events']],
line_color='white',
fill_color='white',
align='left',
font=dict(color='#0000FF',size=35),
height=40,)
)
])
category_fig.update_layout(width=1085, height=450)
category_fig.show()
top_litter_items = sub_category_totals.sort_values(['category','total'],ascending=[True,False]).groupby('category').head(1)
top_litter_items
| generalcode | category | generalname | total | |
|---|---|---|---|---|
| 161 | G213 | Chemicals | Paraffin/Wax | 14184.0 |
| 107 | G145 | Cloth/textile | Other textiles (incl. rags) | 13834.0 |
| 158 | G208 | Glass/ceramics | Glass or ceramic fragments > 2.5 cm | 63418.0 |
| 133 | G178 | Metal | Bottle caps, lids & pull tabs | 26295.0 |
| 117 | G158 | Paper/Cardboard | Other paper items | 11943.0 |
| 23 | G27 | Plastic | Cigarette butts and filters | 503348.0 |
| 127 | G171 | Processed/worked wood | Other wood < 50 cm | 5931.0 |
| 89 | G125 | Rubber | Balloons and balloon sticks | 4524.0 |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... | 1306.0 |
category_totals
| category | total | % of Litter Collected | |
|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.68 |
| 1 | Cloth/textile | 30066.0 | 1.45 |
| 2 | Glass/ceramics | 112504.0 | 5.43 |
| 3 | Metal | 82404.0 | 3.97 |
| 4 | Paper/Cardboard | 42871.0 | 2.07 |
| 5 | Plastic | 1758360.0 | 84.82 |
| 6 | Processed/worked wood | 18899.0 | 0.91 |
| 7 | Rubber | 12482.0 | 0.60 |
| 8 | unidentified | 1306.0 | 0.06 |
# Merging category totals with top litter items by category
category_totals = pd.merge(category_totals,top_litter_items,on='category')
# Renaming the columns
category_totals.rename(columns={'total_x':'litter_totals','generalname':'top_litter_item','total_y':'top_litter_totals'},inplace=True)
# Creating the % of Litter Collected by category column
category_totals['% of Litter Collected (Category)'] = category_totals['top_litter_totals']/category_totals['litter_totals']*100
# Rounding the column to two decimal places
category_totals['% of Litter Collected (Category)'] = category_totals['% of Litter Collected (Category)'].round(decimals=2)
category_totals
| category | litter_totals | % of Litter Collected | generalcode | top_litter_item | top_litter_totals | % of Litter Collected (Category) | |
|---|---|---|---|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.68 | G213 | Paraffin/Wax | 14184.0 | 100.00 |
| 1 | Cloth/textile | 30066.0 | 1.45 | G145 | Other textiles (incl. rags) | 13834.0 | 46.01 |
| 2 | Glass/ceramics | 112504.0 | 5.43 | G208 | Glass or ceramic fragments > 2.5 cm | 63418.0 | 56.37 |
| 3 | Metal | 82404.0 | 3.97 | G178 | Bottle caps, lids & pull tabs | 26295.0 | 31.91 |
| 4 | Paper/Cardboard | 42871.0 | 2.07 | G158 | Other paper items | 11943.0 | 27.86 |
| 5 | Plastic | 1758360.0 | 84.82 | G27 | Cigarette butts and filters | 503348.0 | 28.63 |
| 6 | Processed/worked wood | 18899.0 | 0.91 | G171 | Other wood < 50 cm | 5931.0 | 31.38 |
| 7 | Rubber | 12482.0 | 0.60 | G125 | Balloons and balloon sticks | 4524.0 | 36.24 |
| 8 | unidentified | 1306.0 | 0.06 | G211 | Other medical items (swabs, bandaging, adhesiv... | 1306.0 | 100.00 |
# Creating a plotly table using the category_totals dataframe
category_totals_fig = go.Figure(data=[go.Table(
header=dict(values=[['Category'],['Litter Totals'],['% of Litter Collected'],['Top Litter Item'],
['Item Totals'],['% of Litter Collected (Category)']],
fill_color='lightgrey',
line_color='darkslategray',
align='left'),
cells=dict(values=[category_totals.category, category_totals.litter_totals, category_totals['% of Litter Collected'],
category_totals.top_litter_item, category_totals.top_litter_totals,
category_totals['% of Litter Collected (Category)']],
fill_color=[[row_color1,row_color2]*5],
line_color='darkslategray',
align='left'))
])
category_totals_fig.update_layout(
showlegend=False,height=650,
title_text="<b>Category Totals</b>")
category_totals_fig.show()
# Summing each litter sub-category (column-wise: all G1's, G2's, etc.) together grouping by oceanbody and year
litter_totals = marine_data.loc[:,'G1':'G213'].groupby([marine_data.BeachRegionalSea,
marine_data.EventDate.dt.year]).sum().reset_index()
litter_totals
| BeachRegionalSea | EventDate | G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Baltic Sea | 2013 | 19.0 | 56.0 | 42.0 | 3.0 | 25.0 | 61.0 | 9.0 | 173.0 | ... | 0.0 | 0.0 | 6.0 | 0.0 | 0.0 | 0.0 | 5.0 | 0.0 | 4.0 | 0.0 |
| 1 | Baltic Sea | 2014 | 73.0 | 32.0 | 261.0 | 0.0 | 44.0 | 25.0 | 3.0 | 46.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16.0 | 0.0 | 0.0 | 1.0 |
| 2 | Baltic Sea | 2015 | 0.0 | 1027.0 | 22.0 | 0.0 | 36.0 | 23.0 | 2.0 | 60.0 | ... | 0.0 | 0.0 | 44.0 | 0.0 | 0.0 | 0.0 | 43.0 | 0.0 | 1.0 | 1.0 |
| 3 | Baltic Sea | 2016 | 0.0 | 78.0 | 56.0 | 50.0 | 26.0 | 16.0 | 1.0 | 35.0 | ... | 1.0 | 1.0 | 187.0 | 0.0 | 0.0 | 0.0 | 43.0 | 14.0 | 5.0 | 9135.0 |
| 4 | Baltic Sea | 2017 | 1.0 | 73.0 | 281.0 | 197.0 | 64.0 | 37.0 | 0.0 | 82.0 | ... | 1.0 | 2.0 | 205.0 | 0.0 | 0.0 | 5.0 | 3784.0 | 15.0 | 46.0 | 31.0 |
| 5 | Baltic Sea | 2018 | 0.0 | 54.0 | 78.0 | 58.0 | 30.0 | 12.0 | 1.0 | 37.0 | ... | 1.0 | 0.0 | 188.0 | 0.0 | 0.0 | 0.0 | 625.0 | 2.0 | 1.0 | 127.0 |
| 6 | Baltic Sea | 2019 | 1.0 | 34.0 | 190.0 | 117.0 | 23.0 | 16.0 | 9.0 | 52.0 | ... | 0.0 | 0.0 | 42.0 | 0.0 | 0.0 | 0.0 | 44.0 | 1.0 | 22.0 | 4.0 |
| 7 | Baltic Sea | 2020 | 0.0 | 14.0 | 61.0 | 0.0 | 5.0 | 4.0 | 0.0 | 9.0 | ... | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 | 0.0 | 13.0 | 0.0 | 0.0 | 10.0 |
| 8 | Black Sea | 2014 | 1.0 | 87.0 | 636.0 | 14.0 | 98.0 | 134.0 | 1.0 | 12.0 | ... | 1.0 | 0.0 | 79.0 | 0.0 | 0.0 | 0.0 | 78.0 | 73.0 | 2.0 | 0.0 |
| 9 | Black Sea | 2015 | 7.0 | 672.0 | 1055.0 | 20.0 | 1346.0 | 1472.0 | 54.0 | 434.0 | ... | 29.0 | 15.0 | 111.0 | 7.0 | 0.0 | 0.0 | 469.0 | 188.0 | 23.0 | 1.0 |
| 10 | Black Sea | 2016 | 29.0 | 165.0 | 1396.0 | 38.0 | 442.0 | 629.0 | 55.0 | 171.0 | ... | 16.0 | 11.0 | 773.0 | 5.0 | 0.0 | 0.0 | 52.0 | 7.0 | 29.0 | 2.0 |
| 11 | Black Sea | 2017 | 0.0 | 126.0 | 1504.0 | 181.0 | 142.0 | 115.0 | 4.0 | 45.0 | ... | 1.0 | 28.0 | 1014.0 | 0.0 | 0.0 | 0.0 | 61.0 | 7.0 | 8.0 | 1.0 |
| 12 | Black Sea | 2018 | 8.0 | 198.0 | 1532.0 | 103.0 | 432.0 | 496.0 | 76.0 | 214.0 | ... | 2.0 | 25.0 | 105.0 | 2.0 | 1.0 | 0.0 | 551.0 | 3.0 | 19.0 | 8.0 |
| 13 | Black Sea | 2019 | 6.0 | 817.0 | 1926.0 | 121.0 | 1205.0 | 715.0 | 206.0 | 397.0 | ... | 23.0 | 5.0 | 672.0 | 6.0 | 0.0 | 0.0 | 2128.0 | 39.0 | 72.0 | 6.0 |
| 14 | Black Sea | 2020 | 4.0 | 505.0 | 1151.0 | 130.0 | 352.0 | 528.0 | 94.0 | 204.0 | ... | 8.0 | 3.0 | 373.0 | 3.0 | 0.0 | 0.0 | 583.0 | 36.0 | 23.0 | 4.0 |
| 15 | Black Sea | 2021 | 0.0 | 2.0 | 6.0 | 4.0 | 21.0 | 11.0 | 0.0 | 5.0 | ... | 0.0 | 2.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 |
| 16 | Mediterranean Sea | 2013 | 9.0 | 173.0 | 457.0 | 2.0 | 116.0 | 12.0 | 6.0 | 359.0 | ... | 0.0 | 0.0 | 23.0 | 0.0 | 0.0 | 0.0 | 263.0 | 33.0 | 5.0 | 0.0 |
| 17 | Mediterranean Sea | 2014 | 34.0 | 973.0 | 172.0 | 11.0 | 965.0 | 903.0 | 139.0 | 590.0 | ... | 5.0 | 8.0 | 494.0 | 0.0 | 0.0 | 0.0 | 1770.0 | 75.0 | 9.0 | 2.0 |
| 18 | Mediterranean Sea | 2015 | 35.0 | 6833.0 | 1406.0 | 29.0 | 4253.0 | 2986.0 | 295.0 | 3120.0 | ... | 10.0 | 1.0 | 65.0 | 0.0 | 0.0 | 0.0 | 4017.0 | 83.0 | 28.0 | 0.0 |
| 19 | Mediterranean Sea | 2016 | 139.0 | 1543.0 | 340.0 | 35.0 | 3204.0 | 3680.0 | 442.0 | 798.0 | ... | 218.0 | 2.0 | 292.0 | 0.0 | 0.0 | 0.0 | 1126.0 | 670.0 | 65.0 | 0.0 |
| 20 | Mediterranean Sea | 2017 | 1514.0 | 3812.0 | 3388.0 | 1609.0 | 4519.0 | 4077.0 | 1176.0 | 3334.0 | ... | 61.0 | 307.0 | 3190.0 | 20.0 | 178.0 | 176.0 | 13374.0 | 364.0 | 357.0 | 154.0 |
| 21 | Mediterranean Sea | 2018 | 1389.0 | 3059.0 | 2512.0 | 1730.0 | 3827.0 | 3038.0 | 1041.0 | 2389.0 | ... | 45.0 | 205.0 | 2117.0 | 16.0 | 169.0 | 168.0 | 6928.0 | 1026.0 | 287.0 | 16.0 |
| 22 | Mediterranean Sea | 2019 | 28.0 | 11836.0 | 53.0 | 3.0 | 2457.0 | 2343.0 | 32.0 | 3718.0 | ... | 5.0 | 1.0 | 36.0 | 0.0 | 1.0 | 0.0 | 6181.0 | 0.0 | 10.0 | 0.0 |
| 23 | Mediterranean Sea | 2020 | 38.0 | 184.0 | 58.0 | 24.0 | 93.0 | 129.0 | 0.0 | 25.0 | ... | 0.0 | 0.0 | 10.0 | 0.0 | 0.0 | 0.0 | 39.0 | 2.0 | 80.0 | 0.0 |
| 24 | Mediterranean Sea | 2021 | 0.0 | 60.0 | 0.0 | 0.0 | 21.0 | 7.0 | 0.0 | 6.0 | ... | 0.0 | 0.0 | 36.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 25 | North-east Atlantic Ocean | 2013 | 4.0 | 238.0 | 4.0 | 0.0 | 103.0 | 0.0 | 0.0 | 135.0 | ... | 1.0 | 0.0 | 22.0 | 0.0 | 0.0 | 0.0 | 23.0 | 0.0 | 2.0 | 24.0 |
| 26 | North-east Atlantic Ocean | 2014 | 357.0 | 867.0 | 566.0 | 3.0 | 697.0 | 308.0 | 7.0 | 1156.0 | ... | 0.0 | 7.0 | 22.0 | 0.0 | 0.0 | 0.0 | 520.0 | 49.0 | 33.0 | 3.0 |
| 27 | North-east Atlantic Ocean | 2015 | 31.0 | 8182.0 | 885.0 | 1.0 | 2683.0 | 1749.0 | 89.0 | 3287.0 | ... | 10.0 | 1.0 | 84.0 | 0.0 | 0.0 | 0.0 | 6347.0 | 198.0 | 17.0 | 91.0 |
| 28 | North-east Atlantic Ocean | 2016 | 23.0 | 1508.0 | 671.0 | 8.0 | 1137.0 | 494.0 | 33.0 | 713.0 | ... | 7.0 | 3.0 | 195.0 | 1.0 | 0.0 | 0.0 | 1339.0 | 198.0 | 49.0 | 1929.0 |
| 29 | North-east Atlantic Ocean | 2017 | 5.0 | 755.0 | 352.0 | 94.0 | 662.0 | 58.0 | 180.0 | 525.0 | ... | 4.0 | 2.0 | 278.0 | 0.0 | 0.0 | 0.0 | 358.0 | 130.0 | 22.0 | 2290.0 |
| 30 | North-east Atlantic Ocean | 2018 | 21.0 | 1079.0 | 581.0 | 1.0 | 723.0 | 48.0 | 81.0 | 430.0 | ... | 2.0 | 4.0 | 723.0 | 0.0 | 0.0 | 0.0 | 449.0 | 2019.0 | 44.0 | 66.0 |
| 31 | North-east Atlantic Ocean | 2019 | 7.0 | 32013.0 | 667.0 | 20.0 | 1421.0 | 1991.0 | 49.0 | 12690.0 | ... | 5.0 | 40.0 | 248.0 | 1.0 | 1.0 | 1.0 | 9607.0 | 2744.0 | 17.0 | 135.0 |
| 32 | North-east Atlantic Ocean | 2020 | 3.0 | 62.0 | 116.0 | 1.0 | 17.0 | 9.0 | 9.0 | 87.0 | ... | 2.0 | 21.0 | 145.0 | 1.0 | 0.0 | 0.0 | 94.0 | 6.0 | 1.0 | 141.0 |
| 33 | Unknown | 2014 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 34 | Unknown | 2015 | 0.0 | 2942.0 | 217.0 | 0.0 | 900.0 | 370.0 | 0.0 | 227.0 | ... | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 476.0 | 0.0 | 0.0 | 0.0 |
| 35 | Unknown | 2016 | 0.0 | 433.0 | 0.0 | 0.0 | 51.0 | 0.0 | 12.0 | 48.0 | ... | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.0 | 0.0 | 0.0 |
| 36 | Unknown | 2017 | 0.0 | 11.0 | 45.0 | 0.0 | 3.0 | 2.0 | 0.0 | 4.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 57.0 | 0.0 | 0.0 | 2.0 |
| 37 | Unknown | 2018 | 7.0 | 85.0 | 96.0 | 21.0 | 310.0 | 404.0 | 54.0 | 98.0 | ... | 0.0 | 3.0 | 90.0 | 0.0 | 0.0 | 0.0 | 314.0 | 19.0 | 20.0 | 0.0 |
| 38 | Unknown | 2019 | 0.0 | 4366.0 | 5.0 | 0.0 | 1312.0 | 2030.0 | 0.0 | 6442.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1621.0 | 0.0 | 0.0 | 0.0 |
| 39 | Unknown | 2020 | 0.0 | 19.0 | 5.0 | 0.0 | 420.0 | 3516.0 | 28.0 | 16.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.0 | 0.0 | 0.0 | 0.0 |
40 rows × 164 columns
# Concatenating litter totals with itself, attaching the BeachRegionalSea and EventDate columns with the litter
# sub-category columns (while summing the total (row-wise: G1 through G213) for each year and ocean body to return yearly total)
litter_totals = pd.concat([litter_totals.loc[:,'BeachRegionalSea':'EventDate'],
litter_totals.loc[:,'G1':'G213'].sum(axis=1)],axis=1)
# Changing the column names
litter_totals.rename(columns={'BeachRegionalSea':'Oceanbody','EventDate':'Year',0:'LitterTotals'},inplace=True)
# Checking litter_totals dataframe
litter_totals.head()
| Oceanbody | Year | LitterTotals | |
|---|---|---|---|
| 0 | Baltic Sea | 2013 | 2246.0 |
| 1 | Baltic Sea | 2014 | 2606.0 |
| 2 | Baltic Sea | 2015 | 2405.0 |
| 3 | Baltic Sea | 2016 | 10830.0 |
| 4 | Baltic Sea | 2017 | 28054.0 |
litter_graph = px.bar(litter_totals,x='Year',y='LitterTotals',color='Oceanbody',
title='Yearly Collection Totals',
labels={'LitterTotals':'Litter Collected (Sum)','Oceanbody':'Ocean/Sea'},
color_discrete_map={'Baltic Sea':'#007FFF',
'Black Sea':'#2A52BE',
'Mediterranean Sea':'#0000FF',
'North-east Atlantic Ocean':'#3F3F68',
'Unknown':'#191970'})
litter_graph.update_layout(xaxis_title=None,
updatemenus=[go.layout.Updatemenu(
active=0,
buttons=list(
[dict(label = 'All',
method = 'update',
args = [{'visible': [True, True, True, True, True]},
{'title': '<b>Litter Collection Totals</b>',
'showlegend':True}]),
dict(label = 'Baltic Sea',
method = 'update',
args = [{'visible': [True, False, False, False, False]},
{'title': '<b>Litter Collection Totals - Baltic Sea</b>',
'showlegend':False}]),
dict(label = 'Black Sea',
method = 'update',
args = [{'visible': [False, True, False, False, False]},
{'title': '<b>Litter Collection Totals - Black Sea</b>',
'showlegend':False}]),
dict(label = 'Mediterranean Sea',
method = 'update',
args = [{'visible': [False, False, True, False, False]},
{'title': '<b>Litter Collection Totals - Mediterranean Sea</b>',
'showlegend':False}]),
dict(label = 'North-east Atlantic Ocean',
method = 'update',
args = [{'visible': [False, False, False, True, False]},
{'title': '<b>Litter Collection Totals - North-east Atlantic Ocean</b>',
'showlegend':False}]),
dict(label = 'Unknown',
method = 'update',
args = [{'visible': [False, False, False, False, True]},
{'title': '<b>Litter Collection Totals - Unknown</b>',
'showlegend':False}]),
]),
type = "buttons",
direction="right",
pad={"r": 25, "t": 25},
x=0.5,
y=1.02,
showactive=True,
xanchor="center",
yanchor="bottom",
)
])
litter_graph.show()
# Showing the total number (NumofEvents) for each event (Cleanup/Monitoring) for each year/each BeachRegionalSea and
# the sum of litter counts for each type of litter (G1,G3,G4,etc.)
event_type_data = marine_data.groupby([marine_data.EventDate.dt.month,marine_data.EventDate.dt.year,
'EventType','BeachRegionalSea',
'lon_x2','lat_y2']).agg({'EventType':'count'}).reset_index(allow_duplicates=True)
event_type_data.columns.values[0] = 'Month'
event_type_data.columns.values[1] = 'Year'
event_type_data.columns.values[2] = 'EventType'
event_type_data.columns.values[3] = 'BeachRegionalSea'
event_type_data.columns.values[4] = 'lon_x1'
event_type_data.columns.values[5] = 'lat_y1'
event_type_data.columns.values[6] = 'NumofEvents(Count)'
event_type_data.head(15)
| Month | Year | EventType | BeachRegionalSea | lon_x1 | lat_y1 | NumofEvents(Count) | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 2014 | Cleanup | Mediterranean Sea | 9.886536 | 44.083107 | 1 |
| 1 | 1 | 2014 | Cleanup | North-east Atlantic Ocean | -1.320040 | 44.112300 | 1 |
| 2 | 1 | 2015 | Cleanup | Black Sea | 27.668371 | 42.409241 | 1 |
| 3 | 1 | 2015 | Cleanup | Black Sea | 27.709501 | 42.366164 | 2 |
| 4 | 1 | 2015 | Cleanup | Black Sea | 28.589187 | 43.809237 | 1 |
| 5 | 1 | 2015 | Cleanup | Black Sea | 28.589472 | 43.809385 | 1 |
| 6 | 1 | 2015 | Cleanup | Black Sea | 28.592534 | 43.829550 | 1 |
| 7 | 1 | 2015 | Cleanup | Black Sea | 28.592712 | 43.827780 | 1 |
| 8 | 1 | 2015 | Cleanup | Black Sea | 28.593001 | 43.827915 | 1 |
| 9 | 1 | 2015 | Cleanup | Black Sea | 28.629170 | 44.230505 | 1 |
| 10 | 1 | 2015 | Cleanup | Mediterranean Sea | 2.555432 | 39.699948 | 1 |
| 11 | 1 | 2015 | Cleanup | Mediterranean Sea | 10.265680 | 43.790288 | 1 |
| 12 | 1 | 2015 | Cleanup | Mediterranean Sea | 26.968493 | 37.698421 | 1 |
| 13 | 1 | 2016 | Cleanup | Mediterranean Sea | 10.026400 | 44.042900 | 1 |
| 14 | 1 | 2016 | Cleanup | Mediterranean Sea | 23.904980 | 37.736122 | 2 |
# Creating a map visualation displaying the locations of litter events
event_map = px.scatter_geo(event_type_data, lat='lat_y1',lon='lon_x1',color='EventType',
labels={'EventType':'Event Type'},color_discrete_sequence=['blue','red'],
size="NumofEvents(Count)",height=600,hover_name='EventType',hover_data=['Month','Year'],
projection="natural earth",title='Litter Event Locations')
event_map.update_layout(xaxis_title=None,
updatemenus=[go.layout.Updatemenu(
active=0,
buttons=list(
[dict(label = 'Both',
method = 'update',
args = [{'visible': [True, True]},
{'title': '<b>Litter Event Locations</b>',
'showlegend':True}]),
dict(label = 'Cleanup',
method = 'update',
args = [{'visible': [True, False]},
{'title': '<b>Litter Event Locations - Cleaning</b>',
'showlegend':True}]),
dict(label = 'Monitoring',
method = 'update',
args = [{'visible': [False, True]},
{'title': '<b>Litter Event Locations - Monitoring</b>',
'showlegend':True}])
]),
type = "buttons",
direction="right",
pad={"r": 25, "t": 25},
x=0.5,
y=1.02,
showactive=True,
xanchor="center",
yanchor="bottom"
)
])
event_map.update_geos(showrivers=True,showlakes=True,rivercolor='slateblue',lakecolor='slateblue',showcountries=True)
event_map.show()